/*******************************************************************************

Ryan Hill: ryan.hill@kellogg.northwestern.edu
Carolyn Stein: carolyn_stein@berkeley.edu
Last modified: December 2024

Inputs: 	pdbe_Validation.csv (downloaded 10/25/19)

Outputs: 	clean_validation.dta

*******************************************************************************/

clear all
import delimited using "${data_raw}PDB/pdbe_Validation.csv"

* Clean up
drop v1
gen structureId = upper(structureid)
drop structureid
order structureId

* Keep variables with decent coverage
keep structureId clash* percent*

* Rename variables
rename clashscoreabsolute clashAbs
rename clashscorerawvalue clashRaw
rename clashscorerelative clashRel
rename percentrsrzoutliersabsolute rsrzAbs
rename percentrsrzoutliersrawvalue rsrzRaw
rename percentrsrzoutliersrelative rsrzRel
rename percentramaoutliersabsolute ramaAbs
rename percentramaoutliersrawvalue ramaRaw
rename percentramaoutliersrelative ramaRel
rename percentrotaoutliersabsolute rotaAbs
rename percentrotaoutliersrawvalue rotaRaw
rename percentrotaoutliersrelative rotaRel

save "${data_clean}clean_validation.dta", replace
